We will demonstrate several things which can be done with Jupyter Notebooks.

First we load standard notebooks for analyzing and plotting data.

import numpy as np 
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt

DEMO 1: City of Chicago Budget

We will make a piechart of the City of Chicago budget.
  1. First let’s display the PDF with the budget details.

class PDF(object):
  def __init__(self, pdf, size=(300,300)):
    self.pdf = pdf
    self.size = size

  def _repr_html_(self):
    return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdf, self.size)

  def _repr_latex_(self):
    return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)
PDF('ChicagoBudget.pdf',size=(700,400))
  1. Next let’s read an Excel sheet with the summary of the budget.

budget=pd.read_excel('ChicagoBudget.xlsx')
budget
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
File //anaconda3/lib/python3.10/site-packages/pandas/compat/_optional.py:142, in import_optional_dependency(name, extra, errors, min_version)
    141 try:
--> 142     module = importlib.import_module(name)
    143 except ImportError:

File //anaconda3/lib/python3.10/importlib/__init__.py:126, in import_module(name, package)
    125         level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)

File <frozen importlib._bootstrap>:1050, in _gcd_import(name, package, level)

File <frozen importlib._bootstrap>:1027, in _find_and_load(name, import_)

File <frozen importlib._bootstrap>:1004, in _find_and_load_unlocked(name, import_)

ModuleNotFoundError: No module named 'openpyxl'

During handling of the above exception, another exception occurred:

ImportError                               Traceback (most recent call last)
Cell In[3], line 1
----> 1 budget=pd.read_excel('ChicagoBudget.xlsx')
      2 budget

File //anaconda3/lib/python3.10/site-packages/pandas/io/excel/_base.py:478, in read_excel(io, sheet_name, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, date_format, thousands, decimal, comment, skipfooter, storage_options, dtype_backend)
    476 if not isinstance(io, ExcelFile):
    477     should_close = True
--> 478     io = ExcelFile(io, storage_options=storage_options, engine=engine)
    479 elif engine and engine != io.engine:
    480     raise ValueError(
    481         "Engine should not be specified when passing "
    482         "an ExcelFile - ExcelFile already has the engine set"
    483     )

File //anaconda3/lib/python3.10/site-packages/pandas/io/excel/_base.py:1513, in ExcelFile.__init__(self, path_or_buffer, engine, storage_options)
   1510 self.engine = engine
   1511 self.storage_options = storage_options
-> 1513 self._reader = self._engines[engine](self._io, storage_options=storage_options)

File //anaconda3/lib/python3.10/site-packages/pandas/io/excel/_openpyxl.py:548, in OpenpyxlReader.__init__(self, filepath_or_buffer, storage_options)
    533 @doc(storage_options=_shared_docs["storage_options"])
    534 def __init__(
    535     self,
    536     filepath_or_buffer: FilePath | ReadBuffer[bytes],
    537     storage_options: StorageOptions = None,
    538 ) -> None:
    539     """
    540     Reader using openpyxl engine.
    541 
   (...)
    546     {storage_options}
    547     """
--> 548     import_optional_dependency("openpyxl")
    549     super().__init__(filepath_or_buffer, storage_options=storage_options)

File //anaconda3/lib/python3.10/site-packages/pandas/compat/_optional.py:145, in import_optional_dependency(name, extra, errors, min_version)
    143 except ImportError:
    144     if errors == "raise":
--> 145         raise ImportError(msg)
    146     return None
    148 # Handle submodules: if we have submodule, grab parent module from sys.modules

ImportError: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.
  1. A pie chart will show us the proportions.

fig, ax = plt.subplots(figsize=(3,3)) #you can adjust the figsize  (5,5)=(length,width)
plt.rcParams['font.size'] = 3  #fontsize
budget_items = budget["EXPENSE"] #categories
budget_amounts = budget["2023 BUDGET"] #amounts
total=sum(budget_amounts)
ax=plt.pie(budget_amounts,labels=budget_items,autopct=lambda p: '${:.0f}'.format(p * total / 100)) #make pie chart  autopct='%1.0f%%'
plt.gca().set_title('Budget Breakdown $'+str(total)+' (13+ billion) in Expenses',size=5) #add a title
fig.savefig('Budget.png') #save the piechart to a file Budget.png
../../_images/b1b06a47a2d7b6ba9c7597fa76e2cab41e615d2177f24e4f2cfb9d26d197a033.png

Demo 2 Pixel Images

We can increase the resolution of images by increasing the number of pixels.
# PACKAGE: DO NOT EDIT THIS CELL
%matplotlib inline
from ipywidgets import interact
import cv2, os
!pip install opencv-python
Requirement already satisfied: opencv-python in c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages (4.6.0.66)
Requirement already satisfied: numpy>=1.13.3 in c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages (from opencv-python) (1.19.3)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -cipy (c:\users\pisihara\appdata\local\continuum\anaconda3\lib\site-packages)
def makepixelimage(folder, N):

    directory = folder

    # A data structure called a dictionary is used to store the image data and the dataframes we'll make from them.
    imgs = {}
    dfs = {}

    # Specify the pixel image size 
    dsize = (N, N)

    # This will iterate over every image in the directory given, read it into data, and create a 
    # dataframe for it. Both the image data and its corresponding dataframe are stored.
    # Note that when being read into data, we interpret the image as grayscale. 
    pos = 0
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        # checking if it is a file
        if os.path.isfile(f):
            imgs[pos] = cv2.imread(f, 0) # image data
            imgs[pos] = cv2.resize(imgs[pos], dsize)
            dfs[pos] = pd.DataFrame(imgs[pos]) # dataframe
            pos += 1
    return plt.imshow(imgs[0], cmap="gray")
makepixelimage("images", 8)
<matplotlib.image.AxesImage at 0x246a88c7fd0>
../../_images/642b97203473aef1246aa3af430a5531a02682ea1d65f09873dbbea5bb13e3eb.png
makepixelimage("images", 16) #16x16 image
<matplotlib.image.AxesImage at 0x246a893b710>
../../_images/ff58492fc9e45decfdd5e3ff654725f7f4bf690989c97039e5463beca43e37a7.png
makepixelimage("images", 32) #32x32 image
<matplotlib.image.AxesImage at 0x246a89a31d0>
../../_images/d64ae416077a548d4b26d81874a3e48b013e37b094235583cebb861467bcd1d2.png

DEMO 3: Track NFL Player Positions

We will plot the movement of two players step by step in a given play Dataset: NFL_play.xlsx
  1. Import special libraries.

import matplotlib.animation as animation
from matplotlib.animation import FuncAnimation
  1. Read the Player Tracking Data

track_play=pd.read_excel('NFL_play.xlsx')
track_play.head(22)
game_play game_key play_id nfl_player_id datetime step team position jersey_number x_position y_position speed distance direction orientation acceleration sa
0 58580_001136 58580 1136 44830 2021-10-10T21:08:20.900Z -108 away CB 22 61.59 42.60 1.11 0.11 320.33 263.93 0.71 -0.64
1 58580_001136 58580 1136 42355 2021-10-10T21:08:20.900Z -108 away NT 75 59.63 24.33 0.10 0.01 7.98 227.03 0.41 0.27
2 58580_001136 58580 1136 43330 2021-10-10T21:08:20.900Z -108 away ILB 55 60.67 30.89 3.19 0.32 334.89 303.31 1.95 -1.73
3 58580_001136 58580 1136 52425 2021-10-10T21:08:20.900Z -108 home WR 88 56.59 42.86 0.13 0.01 158.78 98.31 0.32 0.02
4 58580_001136 58580 1136 43293 2021-10-10T21:08:20.900Z -108 home RB 21 51.11 26.42 0.14 0.01 144.58 78.52 0.52 0.51
5 58580_001136 58580 1136 40031 2021-10-10T21:08:20.900Z -108 away FS 23 70.53 22.03 0.32 0.03 285.68 287.44 0.28 0.27
6 58580_001136 58580 1136 41242 2021-10-10T21:08:20.900Z -108 home G 70 57.33 24.80 0.03 0.01 328.04 57.38 0.07 0.07
7 58580_001136 58580 1136 52938 2021-10-10T21:08:20.900Z -108 home T 78 57.27 23.47 0.19 0.02 356.50 87.29 0.10 -0.10
8 58580_001136 58580 1136 42347 2021-10-10T21:08:20.900Z -108 home WR 19 56.23 10.68 0.07 0.01 132.91 123.39 0.19 -0.14
9 58580_001136 58580 1136 46135 2021-10-10T21:08:20.900Z -108 away OLB 59 59.90 21.14 1.58 0.16 218.10 278.39 0.48 0.22
10 58580_001136 58580 1136 43424 2021-10-10T21:08:20.900Z -108 home QB 4 57.31 26.27 0.07 0.01 207.25 93.25 0.11 0.07
11 58580_001136 58580 1136 43351 2021-10-10T21:08:20.900Z -108 away CB 24 64.39 10.89 0.80 0.08 130.49 309.04 0.44 -0.44
12 58580_001136 58580 1136 45532 2021-10-10T21:08:20.900Z -108 home TE 89 57.16 30.99 0.07 0.01 39.61 100.73 0.51 -0.14
13 58580_001136 58580 1136 46119 2021-10-10T21:08:20.900Z -108 home G 52 57.36 28.02 0.08 0.01 127.21 241.42 0.05 0.04
14 58580_001136 58580 1136 37082 2021-10-10T21:08:20.900Z -108 home T 77 57.16 29.54 0.12 0.01 145.27 85.16 0.07 0.01
15 58580_001136 58580 1136 53876 2021-10-10T21:08:20.900Z -108 away DE 91 59.65 22.71 0.23 0.02 268.69 271.27 0.23 -0.23
16 58580_001136 58580 1136 53479 2021-10-10T21:08:20.900Z -108 away OLB 51 59.47 29.52 0.24 0.02 296.78 257.94 1.12 0.26
17 58580_001136 58580 1136 52663 2021-10-10T21:08:20.900Z -108 away ILB 48 63.25 27.50 0.51 0.05 183.62 253.71 0.31 0.31
18 58580_001136 58580 1136 46206 2021-10-10T21:08:20.900Z -108 home TE 86 57.37 22.12 0.37 0.04 127.85 63.63 0.69 0.62
19 58580_001136 58580 1136 52444 2021-10-10T21:08:20.900Z -108 away FS 29 72.19 31.46 0.61 0.06 11.77 247.69 0.63 -0.33
20 58580_001136 58580 1136 47800 2021-10-10T21:08:20.900Z -108 away DE 97 59.48 26.81 0.23 0.01 346.84 247.16 1.29 0.90
21 58580_001136 58580 1136 52554 2021-10-10T21:08:20.900Z -108 home C 63 58.18 26.52 0.16 0.02 357.62 102.55 0.60 0.58
  1. Plot the positions of the players at step -108 (before the snap) of play 1136.

fig= plt.figure(figsize=(8,4))
temp=track_play[track_play["step"]==-108]
xmin=temp["x_position"].min()
xmax=temp["x_position"].max()
ymin=temp["y_position"].min()
ymax=temp["y_position"].max()
plt.xlim(xmin-1,xmax+1)
plt.ylim(ymin-1,ymax+1)
for i in temp.index:
    x=temp.loc[i,"x_position"]
    y=temp.loc[i,"y_position"]
    n=temp.loc[i,"nfl_player_id"]
    p=temp.loc[i,"position"]
    if temp.loc[i,"team"]=='home':
        plt.text(x, y, p+str(n),color='b',size=5)
    else:
        plt.text(x, y, p+str(n),color='r',size=5)
plt.title("Play 1136 Step -108",size=10)
plt.show()
../../_images/b9306d72d46d5f7814477b3959f67fa72d3971e19fd2e754975f02b769e0cfbd.png
  1. Let’s define a function which gives the position of players at any step of any play.

def teampositions(data,play,step):
    playdf=data[data["play_id"]==play]
    playdf = playdf.sort_values(by = 'step')
    playdf=playdf.reset_index(drop=True)
    stepdf=playdf[playdf["step"]==step]
    xmin=stepdf["x_position"].min()
    xmax=stepdf["x_position"].max()
    ymin=stepdf["y_position"].min()
    ymax=stepdf["y_position"].max()
    fig= plt.figure(figsize=(8,4))
    plt.xlim(xmin-1,xmax+1)
    plt.ylim(ymin-1,ymax+1)
    for i in stepdf.index:
        x=stepdf.loc[i,"x_position"]
        y=stepdf.loc[i,"y_position"]
        n=stepdf.loc[i,"nfl_player_id"]
        p=stepdf.loc[i,"position"]
        if stepdf.loc[i,"team"]=='home':
            plt.text(x, y, p+str(n),color='b',size=5)
        else:
            plt.text(x, y, p+str(n),color='r',size=5)
    plt.title("Play"+str(play)+ "  Step"+str(step),size=10)
    plt.show()
    return
teampositions(track_play,1136,10)
../../_images/73f96c55db5a800a08b9a10bf4b3a2ec6aa7d85e15d69cb602b4dd1af28f10f2.png
  1. Let’s define a function to animate the movement of two designated players in a specified play.

def movement(data,play,player1,player2):
    playdf=data[data["play_id"]==play]
    playdf = playdf.sort_values(by = 'step')
    playdf=playdf.reset_index(drop=True)
    xmin=playdf["x_position"].min()
    xmax=playdf["x_position"].max()
    ymin=playdf["y_position"].min()
    ymax=playdf["y_position"].max()
    player1df=playdf[playdf["nfl_player_id"]==player1]
    player1df = player1df.sort_values(by = 'step')
    player1df=player1df.reset_index(drop=True)
    player2df=playdf[playdf["nfl_player_id"]==player2]
    player2df = player2df.sort_values(by = 'step')
    player2df=player2df.reset_index(drop=True)
    fig= plt.figure(figsize=(5,3))
    plt.xlim(xmin-1,xmax+1)
    plt.ylim(ymin-1,ymax+1)
    x1=player1df["x_position"]
    y1=player1df["y_position"]
    x2=player2df["x_position"]
    y2=player2df["y_position"]
    def init():  #inital point on sine curve\
        bluex,=plt.gca().plot([x1[0]],[y1[0]])
        redo, = plt.gca().plot([x2[0]],[y2[0]])
        return bluex, redo,

    def animate(i):
        bluex,= plt.gca().plot(x1[i],y1[i],'bx',ms=2,alpha=1)
        redo,=plt.gca().plot([x2[i]],[y2[i]],'ro',ms=2,alpha=1)
        return bluex, redo,
    # create animation using the animate() function
    ani = animation.FuncAnimation(fig, animate, frames=np.arange(1,len(playdf["step"])/22,1), init_func=init, interval=5, blit=True, repeat=False)
    plt.show() 
    return ani
  1. Let’s check the wide receiver (42347) and cornerback (43351) shown at the bottom of the chart in Step 4).

%matplotlib notebook
movement(track_play,1136,42347,43351)
<matplotlib.animation.FuncAnimation at 0x246a8d8ee48>
Traceback (most recent call last):
  File "C:\Users\pisihara\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\cbook\__init__.py", line 388, in process
    proxy(*args, **kwargs)
  File "C:\Users\pisihara\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\cbook\__init__.py", line 228, in __call__
    return mtd(*args, **kwargs)
  File "C:\Users\pisihara\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\animation.py", line 1560, in _stop
    self.event_source.remove_callback(self._loop_delay)
AttributeError: 'NoneType' object has no attribute 'remove_callback'

Demo 4 Word Clouds

Let's make a word cloud Christmas card using the song "Twelve Days of Christmas."
import wordcloud
#Define a function which counts the interesting words
def calculate_frequencies(textfile):
    #list of punctuations
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
    #list of uninteresting words 
    uninteresting_words = ["AND","BY","IT","THE","THAT","A","IS","HAD","TO","NOT","BUT","FOR","OF","WHICH","IF","IN","ON","WERE","YE","THOU"]
    
     # removes punctuation and uninteresting words
    import re
    fc1=str(textfile)
    fc2= fc1.split(' ')
    for i in range(len(fc2)): 
        fc2[i] = fc2[i].upper()
    #Remove punctuations
    fc3 = []
    for s in fc2:
        if not any([o in s for o in punctuations]):
            fc3.append(s)
    #Remove uninteresting words
    fc4=[]
    for s in fc3:
        if not any([o in s for o in uninteresting_words]):
            fc4.append(s)
    fc5=[]
    for s in fc4:
        if not any([o.lower() in s for o in uninteresting_words]):
            fc5.append(s)
            
    while('' in fc5) : 
        fc5.remove('') 
        
    import collections
    fc6 = collections.Counter(fc5)

    #wordcloud
    cloud = wordcloud.WordCloud( max_words = 15)  #can adjust the number of words
    cloud.generate_from_frequencies(fc6)
    return cloud.to_array()
%matplotlib notebook
#Open the text file with the words to be plotted.
with open('twelvedays.txt','r') as file:  
    carol = file.readlines()
    
#make the wordcloud   
carol = calculate_frequencies(carol)
plt.imshow(carol, interpolation = 'nearest')
plt.text(-5,70,"Merry Christmas!",color='r',size=40) #***TASK 2***Add Christmas! after Merry
plt.axis('off')
plt.savefig('card.png', bbox_inches='tight') 

Demo 5 Name that Tune

  1. Musical sound waves are created by rapid vibrations caused by musical isntruments.

from IPython.display import YouTubeVideo
YouTubeVideo('tVYQRC1-D54')
  1. Sound waves are represented mathematically by sine waves with different frequencies.

def sinewave(frequency):
    #-----------CREATE THE SOUND WAVE-------------------
    sampling_rate=44100  #how many times we take a measurement each second
    t = np.linspace(0,1,sampling_rate)  # take 44100 samples in 1 second; 
    sound_wave=np.sin(frequency* 2*np.pi* t)  # mathematical definition of a sine wave
    #----------PLOT THE SOUND WAVE----------------------
    import matplotlib.pyplot as plt
    fig=plt.figure(figsize=(2,1))
    plt.plot(t,sound_wave)
    plt.xlabel("seconds")
    return
sinewave(1)  #frequency=1 and 1 cycle per second
sinewave(2)  #frequency=2 and 2 cycles per second
sinewave(20) #frequency=20 and 20 cycles per second
  1. A computer can create a musical tone based on a given frequency.

def play(freq):
    import numpy as np
    from IPython.display import Audio  #library used to create sounds
    sampling_rate = 44100 # <- rate of sampling
    t = np.linspace(0, 2,  sampling_rate) # <- setup time values
    sound_wave = np.sin(2 * np.pi * freq * t) # <- sine function formula
    return Audio(sound_wave, rate=sampling_rate, autoplay=True) # play the generated sound
play(220) # play a sound at 220 hz 
  1. A musical scale is a sequence of frequencies.

from IPython.display import Audio 
rest=0
do=220
re=9/8*220
mi=5/4*220
fa=4/3*220
so=3/2*220
la=5/3*220
ti=15/8*220
do1=2*220
re1=2*9/8*220
mi1=2*5/4*220
fa1=2*4/3*220
so1=2*3/2*220
la1=2*5/3*220
ti1=2*15/8*220
do2=2*2*220
scale=[do,re,mi,fa,so,la,ti,do1]
def play(song):
    song=np.array(song)
    framerate = 44100
    t = np.linspace(0, len(song) / 2, round(framerate * len(song) / 2))[:-1]
    song_idx = np.floor(t * 2).astype(int)
    data = np.sin(2 * np.pi * song[song_idx] * t)
    return Audio(data, rate=framerate, autoplay=True)
play(scale)
  1. Can you name that tune?

tune= [so, so , la, la, so, fa,mi,rest,so, so , la, la, so, fa,mi,rest,so,so,la,ti,do1,do1,re1,re1,ti,la,ti,la,so,so,la,ti,do1,do1,ti,la,so,so,rest,rest,la,la,so,fa,mi,mi,rest,rest,so,so,do,fa,mi,mi,re,re,do,do,do,do,rest,rest]
play(tune)
from IPython.display import YouTubeVideo
YouTubeVideo('L4PA-MFSM34')